devtools::install_github('charlie86/spotifyr')
## Skipping install of 'spotifyr' from a github remote, the SHA1 (ea9d985d) has not changed since last install.
##   Use `force = TRUE` to force installation
library(spotifyr)
library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0     ✔ readr   1.1.1
## ✔ tibble  1.4.2     ✔ purrr   0.2.5
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ ggplot2 3.0.0     ✔ forcats 0.3.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(knitr)
library(beeswarm)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
devtools::install_github("josiahparry/geniusR")
## Skipping install of 'geniusR' from a github remote, the SHA1 (11fc6ec7) has not changed since last install.
##   Use `force = TRUE` to force installation
library(geniusR)
## 
## Attaching package: 'geniusR'
## The following objects are masked from 'package:spotifyr':
## 
##     add_genius, gen_album_url, gen_song_url, genius_album,
##     genius_lyrics, genius_tracklist, genius_url, possible_album,
##     possible_lyrics, prep_info
library(dplyr) #data manipulation
library(ggplot2) #visualizations
library(gridExtra) #viewing multiple plots together
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(tidytext) #text mining
library(wordcloud2) #creative visualizations

Sys.setenv(SPOTIFY_CLIENT_ID = '58f53eb1c69d4bd3a4aa2abd2ea265a1')
Sys.setenv(SPOTIFY_CLIENT_SECRET = '10b381cbeab548eb80d6f837be01e41e')

Sys.setenv(GENIUS_API_TOKEN = 'BOqFxFmHKNRwYEJuf0X5etaeaUhaZxNWiNGhYOqkvrLnh9STzd3hPgiYfvIv7w90')

accesstoken <- get_spotify_access_token()

# Spotify Artist : Foo Fighters
# spotify:artist:7jy3rLJdDQY21OgRLCZ9sD

#Extract data from spotify
ff_albums <- get_artist_albums(artist = 'Foo Fighters')
ff_meta <- get_albums(albums = ff_albums$album_uri)
ff_tracks <- get_album_tracks(ff_albums)
ff_audio_analysis <- data.frame(get_track_audio_analysis(ff_tracks$track_uri[1]))
ff_audio_features <- data.frame(get_track_audio_features(ff_tracks))

# using melt to reshape the columns in top_tracks into rows
# categories - danceability, energy, loudness, speechiness, acousticness, instrumentalness, 
# liveness, valence, tempo
ff_audio_melt <- melt(ff_audio_features, id=(c("track_uri","duration_ms","time_signature","key_mode")))
# remove rows where variable = key or mode, not interested in those
ff_audio_melt <- ff_audio_melt %>%
    filter(!(variable %in% c("key","mode")))

# set the value column to numeric
ff_audio_melt$value<-as.numeric(ff_audio_melt$value)
min_tempo <<- min(ff_audio_features$tempo)
min_dance <<- min(ff_audio_features$danceability)
min_energy <<- min(ff_audio_features$energy)
min_loudness <<- min(ff_audio_features$loudness)
min_speechiness <<- min(ff_audio_features$speechiness)
min_acoustic <<- min(ff_audio_features$acousticness)
min_instru <<- min(ff_audio_features$instrumentalness)
min_valence <<- min(ff_audio_features$valence)
min_live <<- min(ff_audio_features$liveness)
# max's
max_tempo <<- max(ff_audio_features$tempo)
max_dance <<- max(ff_audio_features$danceability)
max_energy <<- max(ff_audio_features$energy)
max_loudness <<- max(ff_audio_features$loudness)
max_speechiness <<- max(ff_audio_features$speechiness)
max_acoustic <<- max(ff_audio_features$acousticness)
max_instru <<- max(ff_audio_features$instrumentalness)
max_valence <<- max(ff_audio_features$valence)
max_live <<- max(ff_audio_features$liveness)

# function for normalising the data, formula => (value - min) / (max - min)
norm_data <- function(var1,x_val) {
    if (var1=="tempo") {
        norm_val <- (x_val-min_tempo)/(max_tempo-min_tempo)
    }
    if (var1=="liveness") {
        norm_val <- (x_val-min_live)/(max_live-min_live)
    }
    if (var1=="danceability") {
        norm_val <- (x_val-min_dance)/(max_dance-min_dance)
    }
    if (var1=="energy") {
        norm_val <- (x_val-min_energy)/(max_energy-min_energy)
    }
    if (var1=="loudness") {
        norm_val <- (x_val-min_loudness)/(max_loudness-min_loudness)
    }
    if (var1=="speechiness") {
        norm_val <- (x_val-min_speechiness)/(max_speechiness-min_speechiness)
    }
    if (var1=="acousticness") {
        norm_val <- (x_val-min_acoustic)/(max_acoustic-min_acoustic)
    }
    if (var1=="instrumentalness") {
        norm_val <- (x_val-min_instru)/(max_instru-min_instru)
    }
    if (var1=="valence") {
        norm_val <- (x_val-min_valence)/(max_valence-min_valence)
    }
    return(norm_val)
}

# normalise the audio values for the tt_audio_melt dataframe
for (row in 1:nrow(ff_audio_melt)) {
    # print(tt_audio_melt$value)
    ff_audio_melt$norm_value[row] <- norm_data(ff_audio_melt$variable[row],ff_audio_melt$value[row])
}

ff_sorted <- ff_audio_melt %>%
    group_by(variable) %>%
    summarise(m = median(norm_value)) %>%
    arrange(m) %>%
    .[["variable"]]

# Create Boxplot for audio features on top tracks, data is normalised
p <- plot_ly(data = ff_audio_melt, x = ~factor(variable,ff_sorted), y = ~norm_value, type = "box", 
             color = ~variable, boxpoints = "all", jitter = 0.3, pointpos = -1.8)
p
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
# Lyrics Analysis - utilising GeniusR library
song_ids <- c()
no_data<-"N"
for (row in 1:nrow(ff_tracks)) {
    no_data<-"N"
    tryCatch({
        lyrics <- genius_lyrics(artist = "Foo Fighters", song = ff_tracks$track_name[row])
    },error=function(e) {
        no_data<-"Y"},
    finally = {
        if (no_data=="N") {
            if (nrow(lyrics)>0) {
                if (row==1) {
                    lyrics$song_id <- row
                    lyrics_ds <- lyrics
                }
                if (row>1) {
                    lyrics$song_id <- row
                    lyrics_ds <- rbind(lyrics_ds,lyrics)
                }
                lyrics <- lyrics[0,]
                song_ids <- c(song_ids, row)
            }
        }
        no_data<-"N"})
}
## Warning in request_GET(session, url): Not Found (HTTP 404).
## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).

## Warning in request_GET(session, url): Not Found (HTTP 404).
# Sentiment Analysis
l_analyse <- lyrics_ds %>%
    select(lyric,song_id) %>%
    group_by(song_id) %>%
    unnest_tokens(word,lyric)

nrc_joy <- get_sentiments("nrc") %>%
    filter(sentiment == "joy")
nrc_anger <- get_sentiments("nrc") %>%
    filter(sentiment == "anger")
nrc_sent <- get_sentiments("nrc")
afinn_sent <- get_sentiments("afinn")
bing_sent <- get_sentiments("bing")

# look for joy words
words_joy <- l_analyse %>%
    inner_join(nrc_joy) %>%
    count(word, sort = TRUE)
## Joining, by = "word"
# look for anger words
words_anger <- l_analyse %>%
    inner_join(nrc_anger) %>%
    count(word, sort = TRUE)  
## Joining, by = "word"
total_words <- rbind(words_joy,words_anger)

# analysing the JOY words
s_analysis <- total_words %>%
    inner_join(nrc_sent) 
## Joining, by = "word"
sentiments_full <- s_analysis %>%
    count(sentiment, sort = TRUE) %>%
    arrange(song_id,sentiment)

s_analysis_bing <- total_words %>%
    inner_join(bing_sent) 
## Joining, by = "word"
sentiments_bing <- s_analysis_bing %>%
    count(sentiment, sort = TRUE) %>%
    arrange(song_id,sentiment)

s_analysis_afinn <- total_words %>%
    inner_join(afinn_sent) 
## Joining, by = "word"
sentiments_afinn <- s_analysis_afinn %>%
    count(score, sort = TRUE) %>%
    arrange(song_id,score)

# Plots of Sentiment Analysis
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# To use for fills, add
scale_fill_manual(values=cbPalette)
## <ggproto object: Class ScaleDiscrete, Scale, gg>
##     aesthetics: fill
##     axis_order: function
##     break_info: function
##     break_positions: function
##     breaks: waiver
##     call: call
##     clone: function
##     dimension: function
##     drop: TRUE
##     expand: waiver
##     get_breaks: function
##     get_breaks_minor: function
##     get_labels: function
##     get_limits: function
##     guide: legend
##     is_discrete: function
##     is_empty: function
##     labels: waiver
##     limits: NULL
##     make_sec_title: function
##     make_title: function
##     map: function
##     map_df: function
##     n.breaks.cache: NULL
##     na.translate: TRUE
##     na.value: NA
##     name: waiver
##     palette: function
##     palette.cache: NULL
##     position: left
##     range: <ggproto object: Class RangeDiscrete, Range, gg>
##         range: NULL
##         reset: function
##         train: function
##         super:  <ggproto object: Class RangeDiscrete, Range, gg>
##     reset: function
##     scale_name: manual
##     train: function
##     train_df: function
##     transform: function
##     transform_df: function
##     super:  <ggproto object: Class ScaleDiscrete, Scale, gg>
ggplot(sentiments_full, aes(sentiment, nn, fill = sentiment)) +
    geom_col() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, face="bold")) +
    theme(
        plot.title = element_text(color="red", size=9, face="bold.italic"),
        axis.title.x = element_text(color="black", size=9, face="bold"),
        axis.title.y = element_text(color="black", size=9, face="bold")
    )

ggplot(sentiments_afinn, aes(score, nn, fill = score)) +
    geom_col() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, face="bold")) +
    theme(
        plot.title = element_text(color="red", size=9, face="bold.italic"),
        axis.title.x = element_text(color="black", size=9, face="bold"),
        axis.title.y = element_text(color="black", size=9, face="bold")
    )

ggplot(sentiments_bing, aes(sentiment, nn, fill = sentiment)) +
    geom_col() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, face="bold")) +
    theme(
        plot.title = element_text(color="red", size=9, face="bold.italic"),
        axis.title.x = element_text(color="black", size=9, face="bold"),
        axis.title.y = element_text(color="black", size=9, face="bold")
    )

# Most common positive and negative words
bing_word_counts <- l_analyse %>%
    inner_join(get_sentiments("bing")) %>%
    count(word, sentiment, sort = TRUE) %>%
    ungroup()
## Joining, by = "word"
bing_word_counts %>%
    group_by(sentiment) %>%
    top_n(5) %>%
    ungroup() %>%
    mutate(word = reorder(word, n)) %>%
    ggplot(aes(word, n, fill = sentiment)) +
    geom_col(show.legend = FALSE) +
    facet_wrap(~sentiment, scales = "free_y") +
    labs(y = "Contribution to sentiment",
         x = NULL) +
    coord_flip()
## Selecting by n

twords <- total_words[c('word','n')]
twords %>%
    anti_join(stop_words) %>%
    count(word) %>%
    wordcloud2(word, size=1.6, color='random-light', backgroundColor="black")
## Joining, by = "word"